clear all
cd "../data"
set maxvar 32767

*--------------------------------------------------------------------*
* Clean Up Data 
*--------------------------------------------------------------------*
/*
** Clean up Daily Returns
use "crsp_daily_returns.dta", clear
rename *, lower
sort permno date
forvalues i = 1/30{
	gen ret_m`i' = ret[_n-`i'] if permno == permno[_n-`i']
	gen vwretd_m`i' = vwretd[_n-`i'] if permno == permno[_n-`i']
	gen abret_m`i' = ret_m`i' - vwretd_m`i'
}
* volatility t-1 to t-5
egen volatility_m1_m5 = rowsd(ret_m1 ret_m2 ret_m3 ret_m4 ret_m5)
* abret t-1 to t-5
gen abret_m1_m5 = abret_m1
forvalues i = 2/5{
	replace abret_m1_m5 = (abret_m1_m5+1) * (abret_m`i'+1) - 1
}
* abret t-6 to t-30
gen abret_m6_m30 = abret_m6
forvalues i = 6/30{
	replace abret_m6_m30 = (abret_m6_m30+1) * (abret_m`i'+1) - 1
}
keep permno date volatility_m1_m5 abret_m1_m5 abret_m6_m30
save "TEMP/crsp_daily_returns_clean.dta", replace

** Clean up Markit data
use "Markit_data.dta", clear
rename datadate markit_date
gen dow = dow(markit_date)
gen date = markit_date
replace date = markit_date - 4 if dow == 1 | dow == 2 
replace date = markit_date - 2 if dow == 4 | dow == 5 | dow == 3
drop dow
drop if cusip == ""
replace cusip = substr(cusip,1,length(cusip)-1)
duplicates drop date cusip, force
save "TEMP/Markit_data_clean.dta", replace

use "Fees.dta", clear
rename *, lower
rename datadate markit_date
gen dow = dow(markit_date)
gen date = markit_date
replace date = markit_date - 4 if dow == 1 | dow == 2 
replace date = markit_date - 2 if dow == 4 | dow == 5 | dow == 3
drop dow
drop if cusip == ""
replace cusip = substr(cusip,1,length(cusip)-1)
duplicates drop date cusip, force
save "TEMP/Fees_clean.dta", replace

** CAR decomposiiton 
use "ReturnDecomposition_byFilingType.dta", clear
rename subjectgvkey gvkey
rename fdate filing_date
collapse (mean) treatment_d stockpicking selection_d , by(filing_date gvkey)
save "TEMP/ReturnDecomposition_byFilingType_mergeready.dta", replace

*-------------------------------*
* Generate the final dataset
*-------------------------------*
use "sc13_2010_2018_quantity.dta", clear
collapse (max) itrade (min) filing_date (sum) quantity ivol, by(permno date)
order permno date filing_date
gen diff_days = filing_date - date
drop if diff_days < 0 | diff_days > 60
gen sc13d_60day = 1
* code up itrade to be 1 if quantity is != 0 
drop itrade
gen itrade = quantity != 0 

* merges with disagreement data
merge m:1 permno date using "StockTwits_all.dta"
drop if _merge == 1
drop _merge
replace sc13d_60day = 0 if sc13d_60day == .

sort permno filing_date date
** Merge on CRSP daily data
merge m:1 permno date using "crsp_daily_trading.dta"
drop if _merge == 2
drop _merge
drop to
gen to = vol/(shrout*1000)
gen act_vol = abs(quantity)
replace act_vol = 0 if act_vol == . 
replace itrade = 0 if itrade == . 
* cakcykate ex-activist vol that is vol minus activist vol
* we want to make sure we have all the variables for all observations
drop if shrout == . 
gen exact_vol = vol - act_vol
gen act_to = act_vol/(shrout*1000) 
* drop if overall activist to is > 1 --- drops two events
egen frac_aquired = sum(act_to), by(permno filing_date)
drop if frac_aquired > 1 
drop frac_aquired
* extrac the ex-activist vol that is orthogonal to activist vol
reg to act_to
predict double exact_resid_to, residuals

** Winsorize Variables 
winsor to, generate(tow) p(0.001)
winsor ret, generate(retw) p(0.001)
winsor exact_resid_to, generate(exact_resid_tow) p(0.001)
winsor act_to if sc13d_60day == 1, generate(act_tow) p(0.001)
replace act_tow = 0 if act_to != . & act_tow == . 

** merge on news data
merge m:1 permno date using "DJNW.dta"
drop if _merge==2
drop _merge
replace num_articles = 0 if num_articles == .
gen ind_articles = (num_articles > 0)

** merge on Technical Disagreement and self-classified messages
merge 1:1 permno date using "StockTwits_tech.dta"
drop if _m == 2
drop _m

** standardize variables
foreach var of varlist disagreement  disagreement_night num_messages num_messages_night num_messages_tech disagreement_tech{
  norm `var' , method(zee)
}

** generate lags
sort permno date
foreach var of varlist zee_disagreement_night zee_disagreement zee_num_messages zee_num_messages_night zee_disagreement_tech zee_num_messages_tech{
	forvalues i = 2/5{
		gen `var'_m`i' = `var'[_n-`i'] if permno == permno[_n-`i']
	}
}

** merge on permno/cusip link
gen year = year(date)
merge m:1 permno year using "permno_cusip_links.dta"
drop if _m == 2
drop _m
drop year

** merge on Markit data
merge m:1 cusip date using "TEMP/Markit_data_clean.dta"
drop if _m == 2
drop _m 
merge m:1 cusip date using "TEMP/Fees_clean.dta"
drop if _m == 2
drop _m
gen frac_loan = shortloanquantity/(shrout*1000)
replace utilisation = 100 if utilisation > 100 & utilisation != . 
replace indicativefee = 1 if indicativefee > 1 & indicativefee != .
* change in fraction on loan 
sort permno date
gen diff_util = utilisation - utilisation[_n-1] if permno == permno[_n-1]
gen diff_frac_loan = frac_loan - frac_loan[_n-1] if permno == permno[_n-1]
gen high_diff_frac_loan = .
replace high_diff_frac_loan = 1 if diff_frac_loan >= 0.01 & diff_frac_loan != . 
replace high_diff_frac_loan = 0 if diff_frac_loan < 0.01  

* merge on CAR decomposition
merge m:1 permno using "gvkey_permno_links.dta"
drop if _m == 2
drop _m
destring gvkey, replace
merge m:1 gvkey filing_date using "TEMP/ReturnDecomposition_byFilingType_mergeready.dta"
drop if _m == 2
drop _m

** high utilisation
sort permno date
forvalues i = 1/5{
	gen utilisation_m`i' = utilisation[_n-`i'] if permno == permno[_n-`i']
	gen indicativefee_m`i' = indicativefee[_n-`i'] if permno == permno[_n-`i']
}
egen mean_utilisation_m1_5 = rowmean(utilisation_m1 utilisation_m2 utilisation_m3 utilisation_m4 utilisation_m5)
gen high_mean_util_mw1 = .
replace high_mean_util_mw1 = 1 if mean_utilisation_m1_5 >= 60 & mean_utilisation_m1_5 != . 
replace high_mean_util_mw1 = 0 if mean_utilisation_m1_5 < 60
drop utilisation_m1 utilisation_m2 utilisation_m3 utilisation_m4 utilisation_m5

** high fees
egen mean_fee_m1_5 = rowmean(indicativefee_m1 indicativefee_m2 indicativefee_m3 indicativefee_m4 indicativefee_m5)
gen high_mean_fee_mw1 = .
replace high_mean_fee_mw1 = 1 if mean_fee_m1_5 >= 0.1 & mean_fee_m1_5 != . 
replace high_mean_fee_mw1 = 0 if mean_fee_m1_5 < 0.1
drop indicativefee_m1 indicativefee_m2 indicativefee_m3 indicativefee_m4 indicativefee_m5

** add returns
merge 1:1 permno date using "TEMP/crsp_daily_returns_clean.dta"
drop if _m == 2
drop _m 

** generate lags
sort permno date
foreach var of varlist tow retw exact_resid_tow act_tow itrade zee_disagreement zee_disagreement_tech  num_messages zee_num_messages  zee_num_messages_tech  high_diff_frac_loan{
	gen `var'_m1 = `var'[_n-1] if permno == permno[_n-1]
}

** multiply by 100
foreach var of varlist tow act_tow exact_resid_tow tow_m1 act_tow_m1 itrade itrade_m1 retw high_diff_frac_loan high_diff_frac_loan_m1{
	gen `var'_100 = `var' * 100
}

save "main_data.dta", replace
*/


*--------------------------------------------------------------------*
* Analysis 
*--------------------------------------------------------------------*

*** NOTE: "main_data_jfqa.dta" was created using the cleaning code above, but from the full datasets, and then reshuffled. We kept only enough observatoins to be able to run the analysis code. 

*---------------------------------------*
* FIGURES
*---------------------------------------*
*--------------*
* Figure 1
*--------------*
** To produce Figure 1 the program calls two R scripts
** Panel a) 
shell "textual_mentions_with_tech.R"
** Panel b)
shell "Rscript short_mentions.R"

*--------------*
* Figures 2+3
*--------------*
use "main_data_jfqa.dta", clear
keep if sc13d_60day == 1
collapse (sum) frac_acquired=act_to (mean)  tow zee_disagreement treatment_d stockpicking selection_d, by(permno filing_date)
** disagreement quintiles
egen terciles = xtile(zee_disagreement ), nq(3)
drop if frac_acquired > 1
gen total_car = treatment_d+stockpicking+selection_d
collapse (mean) frac_acquired tow total_car treatment_d (count) numobs=frac_acquired (sd) total_car_sd=total_car treatment_d_sd=treatment_d frac_acquired_sd = frac_acquired  tow_sd=tow, by(terciles )


*---------------------------------------*
* TABLES
*---------------------------------------*
use "main_data_jfqa.dta", clear

global controls = "tow_m1_100 ind_articles volatility_m1_m5 abret_m1_m5 abret_m6_m30"

*-----------*
* Table 1 
*-----------*
gen high_util_dummy = (high_mean_util_mw1 == 1)
tabstat  tow_100 retw_100 disagreement num_messages high_util_dummy,  stat(n mean sd min p25 p50 p75 max) col(stat) long
tabstat disagreement if num_messages > 1,  stat(n mean sd min p25 p50 p75 max) col(stat) long
tabstat  high_diff_frac_loan_100 itrade_100 act_tow_100 exact_resid_tow_100,  stat(n mean sd min p25 p50 p75 max) col(stat) long
tabstat itrade_100 act_tow_100 exact_resid_tow_100 if sc13d_60day == 1,  stat(n mean sd min p25 p50 p75 max) col(stat) long

*-----------*
* Table 2 
*-----------*
reghdfe itrade_100 zee_disagreement itrade_m1_100 $controls if sc13d_60day == 1, absorb(permno date) cluster(permno date)
reghdfe itrade_100 zee_disagreement zee_num_messages itrade_m1_100 $controls if sc13d_60day == 1, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement high_diff_frac_loan_m1_100 $controls, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement zee_num_messages high_diff_frac_loan_m1_100 $controls, absorb(permno date) cluster(permno date)

*-----------*
* Table 3 
*-----------*
reghdfe itrade_100 zee_disagreement_tech  zee_num_messages_tech itrade_m1_100 $controls if sc13d_60day == 1, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement_tech  zee_num_messages_tech high_diff_frac_loan_m1_100 $controls, absorb(permno date) cluster(permno date)

*-----------*
* Table 4
*-----------*
* Panel A  
reghdfe tow_100 zee_disagreement $controls, absorb(permno date)  cluster(permno date)
reghdfe tow_100 zee_disagreement zee_num_messages $controls, absorb(permno date)  cluster(permno date)
reghdfe tow_100 zee_disagreement zee_num_messages zee_disagreement_m1 zee_num_messages_m1 $controls, absorb(permno date)  cluster(permno date)
reghdfe retw_100 zee_disagreement $controls, absorb(permno date)  cluster(permno date)
reghdfe retw_100 zee_disagreement zee_num_messages $controls, absorb(permno date)  cluster(permno date)
reghdfe retw_100 zee_disagreement zee_num_messages zee_disagreement_m1 zee_num_messages_m1 $controls, absorb(permno date)  cluster(permno date)

* Panel B 
reghdfe retw_100 zee_disagreement $controls if  high_mean_util_mw1 == 1, absorb(permno date)  cluster(permno date)
reghdfe retw_100 zee_disagreement zee_num_messages $controls if  high_mean_util_mw1 == 1, absorb(permno date)  cluster(permno date)
reghdfe retw_100 zee_disagreement zee_num_messages zee_disagreement_m1 zee_num_messages_m1 $controls if  high_mean_util_mw1 == 1, absorb(permno date)  cluster(permno date)
reghdfe retw_100 zee_disagreement  $controls if high_mean_util_mw1 == 0, absorb(permno date)  cluster(permno date)
reghdfe retw_100 zee_disagreement zee_num_messages  $controls if high_mean_util_mw1 == 0, absorb(permno date)  cluster(permno date)
reghdfe retw_100 zee_disagreement zee_num_messages zee_disagreement_m1 zee_num_messages_m1 $controls if high_mean_util_mw1 == 0, absorb(permno date)  cluster(permno date)

*-----------*
* Table 5 
*-----------*
** Panel A 
* High utilization
reghdfe itrade_100 zee_disagreement zee_num_messages itrade_m1_100 $controls if sc13d_60day == 1 &  high_mean_util_mw1 == 1, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement zee_num_messages high_diff_frac_loan_m1_100 $controls if high_mean_util_mw1 == 1, absorb(permno date) cluster(permno date)
* High Fee
reghdfe itrade_100 zee_disagreement zee_num_messages itrade_m1_100 $controls if sc13d_60day == 1 &  high_mean_fee_mw1 == 1, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement zee_num_messages high_diff_frac_loan_m1_100 $controls if high_mean_fee_mw1 == 1, absorb(permno date) cluster(permno date)
** Panel B
* Low utilization
reghdfe itrade_100 zee_disagreement zee_num_messages itrade_m1_100 $controls if sc13d_60day == 1 &  high_mean_util_mw1 == 0, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement zee_num_messages high_diff_frac_loan_m1_100 $controls if high_mean_util_mw1 == 0, absorb(permno date) cluster(permno date)
* Low Fee
reghdfe itrade_100 zee_disagreement zee_num_messages itrade_m1_100 $controls if sc13d_60day == 1 &  high_mean_fee_mw1 == 0, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement zee_num_messages high_diff_frac_loan_m1_100 $controls if high_mean_fee_mw1 == 0, absorb(permno date) cluster(permno date)

** Panel C
* Test coeffcient differences between high and low utilisation
gen high = (high_mean_util_mw1 == 1) if sc13d_60day == 1
reghdfe itrade_100 c.zee_disagreement##i.high c.zee_num_messages##i.high c.itrade_m1_100##i.high c.tow_m1_100##i.high c.ind_articles##i.high c.volatility_m1_m5##i.high c.abret_m1_m5##i.high c.abret_m6_m30##i.high if sc13d_60day == 1, absorb( (permno##high) (date##high) ) cluster(permno date)
quietly summarize itrade_100 if sc13d_60day == 1 & high == 1
scalar mean_high = r(mean)
quietly summarize itrade_100 if sc13d_60day == 1 & high == 0
scalar mean_low = r(mean)
nlcom ( ( _b[zee_disagreement] + _b[1.high#c.zee_disagreement] ) / mean_high ) - ( _b[zee_disagreement] / mean_low )
* Test coeffcient differences between high and low fees
drop high 
gen high = (high_mean_fee_mw1 == 1) if sc13d_60day == 1
reghdfe itrade_100 c.zee_disagreement##i.high c.zee_num_messages##i.high c.itrade_m1_100##i.high c.tow_m1_100##i.high c.ind_articles##i.high c.volatility_m1_m5##i.high c.abret_m1_m5##i.high c.abret_m6_m30##i.high if sc13d_60day == 1, absorb( (permno##high) (date##high) ) cluster(permno date)
quietly summarize itrade_100 if sc13d_60day == 1 & high == 1
scalar mean_high = r(mean)
quietly summarize itrade_100 if sc13d_60day == 1 & high == 0
scalar mean_low = r(mean)
nlcom ( ( _b[zee_disagreement] + _b[1.high#c.zee_disagreement] ) / mean_high ) - ( _b[zee_disagreement] / mean_low )

*-----------*
* Table 6
*-----------*
** Panel A
* High utilization & Overnight
reghdfe itrade_100 zee_disagreement_night zee_num_messages_night itrade_m1_100 $controls if sc13d_60day == 1 &  high_mean_util_mw1 == 1 , absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement_night zee_num_messages_night high_diff_frac_loan_m1_100 $controls if high_mean_util_mw1 == 1, absorb(permno date) cluster(permno date)
* High Fee & Overnight
reghdfe itrade_100 zee_disagreement_night zee_num_messages_night itrade_m1_100 $controls if sc13d_60day == 1 &  high_mean_fee_mw1 == 1, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement_night zee_num_messages_night high_diff_frac_loan_m1_100 $controls if high_mean_fee_mw1 == 1, absorb(permno date) cluster(permno date)
** Panel B
* Low utilization & Overnight
reghdfe itrade_100 zee_disagreement_night zee_num_messages_night itrade_m1_100 $controls if sc13d_60day == 1 &  high_mean_util_mw1 == 0 , absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement_night zee_num_messages_night high_diff_frac_loan_m1_100 $controls if high_mean_util_mw1 == 0  , absorb(permno date) cluster(permno date)
* Low Fee & Overnight
reghdfe itrade_100 zee_disagreement_night zee_num_messages_night itrade_m1_100 $controls if sc13d_60day == 1 &  high_mean_fee_mw1 == 0, absorb(permno date) cluster(permno date)
reghdfe high_diff_frac_loan_100 zee_disagreement_night zee_num_messages_night high_diff_frac_loan_m1_100 $controls if high_mean_fee_mw1 == 0, absorb(permno date) cluster(permno date)

** Panel C
* Test coeffcient differences between high and low utilisation -- night
drop high
gen high = (high_mean_util_mw1 == 1) if sc13d_60day == 1
reghdfe itrade_100 c.zee_disagreement_night##i.high c.zee_num_messages_night##i.high c.itrade_m1_100##i.high c.tow_m1_100##i.high c.ind_articles##i.high c.volatility_m1_m5##i.high c.abret_m1_m5##i.high c.abret_m6_m30##i.high if sc13d_60day == 1, absorb( (permno##high) (date##high) ) cluster(permno date)
quietly summarize itrade_100 if sc13d_60day == 1 & high == 1
scalar mean_high = r(mean)
quietly summarize itrade_100 if sc13d_60day == 1 & high == 0
scalar mean_low = r(mean)
nlcom ( ( _b[zee_disagreement] + _b[1.high#c.zee_disagreement_night] ) / mean_high ) - ( _b[zee_disagreement_night] / mean_low )
* Test coeffcient differences between high and low fees -- night
drop high 
gen high = (high_mean_fee_mw1 == 1) if sc13d_60day == 1
reghdfe itrade_100 c.zee_disagreement_night##i.high c.zee_num_messages_night##i.high c.itrade_m1_100##i.high c.tow_m1_100##i.high c.ind_articles##i.high c.volatility_m1_m5##i.high c.abret_m1_m5##i.high c.abret_m6_m30##i.high if sc13d_60day == 1, absorb( (permno##high) (date##high) ) cluster(permno date)
quietly summarize itrade_100 if sc13d_60day == 1 & high == 1
scalar mean_high = r(mean)
quietly summarize itrade_100 if sc13d_60day == 1 & high == 0
scalar mean_low = r(mean)
nlcom ( ( _b[zee_disagreement] + _b[1.high#c.zee_disagreement_night] ) / mean_high ) - ( _b[zee_disagreement_night] / mean_low )

*-----------*
* Table 7
*-----------*
reghdfe itrade_100 zee_disagreement zee_num_messages itrade_m1_100 $controls if sc13d_60day == 1, absorb(permno date) cluster(permno date)
reghdfe act_tow_100 zee_disagreement zee_num_messages act_tow_m1_100 $controls if sc13d_60day == 1 & itrade == 1, absorb(permno date) cluster(permno date)

*-----------*
* Table 8
*-----------*
reghdfe exact_resid_tow_100 zee_disagreement zee_num_messages act_tow_m1_100 $controls if sc13d_60day == 1, absorb(permno date) cluster(permno date)
reghdfe exact_resid_tow_100 zee_disagreement zee_num_messages act_tow_m1_100 $controls if sc13d_60day == 1 & itrade == 1, absorb(permno date) cluster(permno date)
reghdfe exact_resid_tow_100 zee_disagreement zee_num_messages act_tow_m1_100 $controls if sc13d_60day == 1 & itrade == 0, absorb(permno date) cluster(permno date)
